*********************************************************************************
clear all
set maxvar 30000
capture log close
set more off

****************************************************************************************************
* 			Customize the paths and options:    
****************************************************************************************************
global MY_IN_PATH   "..."
global MY_OUT_PATH  "..."
global MY_TEMP_PATH "..."
global MY_TEMP_PATH_2 "..."
global MY_LOG_FILE  "..."

cd

log using "${MY_LOG_FILE}", text replace

****************************************************************************************************

use ${MY_TEMP_PATH}/patents_temp2.dta, clear
joinby gvkey ayear using ${MY_TEMP_PATH}/patent_notnew_class_indicator_wide.dta, unm(b)
drop _m

	foreach var of varlist notnew* {
		replace `var'=0 if missing(`var')
	}
		
		
	* Generate dummy for entering new technology class
	foreach num of numlist 1/481 {
		gen new`num' = 0
		replace new`num' = 1 if c`num' > 0 & c`num'_cum5 ==0
	}

***Adjust the dummy variable by exclusing the instances where the new technological class was not actually "new" based on proximity between classes ***

* Generate missing variables for missing classes (variables are based on proximity criteria: (>median / >75% / >90%/ max) on average and within patent class)
	foreach i of numlist 4, 14, 15, 17, 18, 35, 42, 384, 374, 463, 470, 481 {
		gen notnew50_cl`i'=0 
		gen notnew50_class_cl`i'=0 
		gen notnew75_cl`i' =0 
		gen notnew75_class_cl`i' =0 
		gen notnew90_cl`i' =0 
		gen notnew90_class_cl`i' =0 
		gen notnew_max_cl`i'=0 
	}

*** Exclude classes that are not actually "new" ***

* Not new class based on proximity criteria: >median (average and within class)
	foreach num of numlist 1/481 {
		gen new`num'_50 = 0
		replace new`num'_50 = 1 if c`num' > 0 & c`num'_cum5 ==0 & notnew50_cl`num'==0
	}

	*within class
	foreach num of numlist 1/481 {
		gen new`num'_50_class = 0
		replace new`num'_50_class = 1 if c`num' > 0 & c`num'_cum5 ==0 & notnew50_class_cl`num'==0
	}

* Not new class based on proximity criteria: >75% (average and within class)
	foreach num of numlist 1/481 {
		gen new`num'_75 = 0
		replace new`num'_75 = 1 if c`num' > 0 & c`num'_cum5 ==0 & notnew75_cl`num'==0
	}

	*within class
	foreach num of numlist 1/481 {
		gen new`num'_75_class = 0
		replace new`num'_75_class = 1 if c`num' > 0 & c`num'_cum5 ==0 & notnew75_class_cl`num'==0
	}


* Not new class based on proximity criteria: >90% (average and within class)
	foreach num of numlist 1/481 {
		gen new`num'_90 = 0
		replace new`num'_90 = 1 if c`num' > 0 & c`num'_cum5 ==0 & notnew90_cl`num'==0
	}

	*within class
	foreach num of numlist 1/481 {
		gen new`num'_90_class = 0
		replace new`num'_90_class = 1 if c`num' > 0 & c`num'_cum5 ==0 & notnew90_class_cl`num'==0
	}

* Not new class based on proximity criteria: max (within class)
	foreach num of numlist 1/481 {
		gen new`num'_max = 0
		replace new`num'_max = 1 if c`num' > 0 & c`num'_cum5 ==0 & notnew_max_cl`num'==0
	}
* Keep only newly generated variables
keep gvkey ayear new*

*Rename variables
rename   new* c_new*
rename *_50 c50_*
rename *_50_class c50_class_*
rename *_75 c75_*
rename *_75_class c75_class_*
rename *_90 c90_*
rename *_90_class c90_class_*
rename *_max cmax_*

*** Reshape into long format ***
reshape long c_new c50_new c50_class_new c75_new c75_class_new c90_new c90_class_new cmax_new, i(gvkey ayear) j(class)
save ${MY_TEMP_PATH}/patents_level_1.dta, replace

rename class uspcid
joinby gvkey ayear uspcid using  ${MY_TEMP_PATH}/patents_1_list, unm(b)
tab _m
keep if _m==3
drop _m
save  ${MY_OUT_PATH}/patents_level_2.dta, replace